***Diss File JK
***Created 29/03/2022
***Updated 26/04/2022 

********************************************************************************
*Do file to analyse Talk of Europe data 1999-2013
********************************************************************************

*Set file paths directory
global raw "/Users/janakonle/Documents/Studium/Dissertation/STATA/raw"
global data "/Users/janakonle/Documents/Studium/Dissertation/STATA/data"
global gra "/Users/janakonle/Documents/Studium/Dissertation/STATA/graphs"
global tab "/Users/janakonle/Documents/Studium/Dissertation/STATA/tables"
global do "/Users/janakonle/Documents/Studium/Dissertation/STATA/do"

********************************************************************************
*Empirical Analysis  
********************************************************************************
*(1) Distributions of frequencies of dictionary term use

*Graph of SRWI per term
use "$data/mep_srwi_0913.dta", clear
*Set graph scheme to neutral
set scheme s1manual

graph hbar (mean) srwi_term, over(female) blabel(bar, size(vsmall) format(%9.0g)) by(, title(Mean of the SRWI per parliamentary term)) by(, legend(off)) by(parlterm, total)

graph export "$gra/srwi_term_by_term_gender.png", replace

*Speeches with dictionary terms per PPG
use "$data/mep_srwi_0913.dta", clear
drop if female == 1
gen dict = 1 if dictionary == 1

gen others = 1 if uen == 1
replace others = 1 if ecr == 1
replace others = 1 if eurosceptics == 1
replace others = 0 if others ==.

bysort name year : egen favspeech_year = count(dict)
keep name year favspeech_year conservatives social_democrats liberals_centrists uen ecr greens gue_ngl eurosceptics independent others PPG 
bysort name year: keep if _n==_N
egen favspeech_total = total(favspeech_year), by(year) 
bysort PPG year: egen favspeech_all = total(favspeech_total)
drop favspeech_year name  favspeech_total
bysort year PPG: keep if _n==_N

twoway line favspeech_all year if conservatives == 1, lpattern(solid) || /// 	
	line favspeech_all year if social_democrats == 1, lpattern(dash) || ///
	line favspeech_all year if liberals_centrists == 1, lpattern(dot) || ///
	line favspeech_all year if greens == 1, lpattern(dash_dot) || ///
	line favspeech_all year if gue_ngl == 1, lpattern(longdash) || ///
	line favspeech_all year if independent == 1, lpattern(longdash_dot) || ///
	line favspeech_all year if others == 1, lpattern(shortdash) ///
	ytitle("Frequency", size(medium)) ///
	xlab(2000 2002 2004 2006 2008 2010 2012) ///
	title("Translated SRWI Speeches by male MEPs by PPG", size(medium)) ///
	legend(order(1 "Conservatives" 2 "Social Democrats" 3 "Liberals and Centrists" ///
	4 "Greens/EFA" 5 "European Left" 6 "Independent" 7 "Others")) ///
	xtitle("Year", size(medium))

	graph export "$gra/favspeech_ppgs.png", replace

*Set graph scheme to neutral
set scheme s1manual	

*Translated SRWI speeches overall 
use "$data/mep_srwi_0913.dta", clear
gen dict = 1 if dictionary == 1
bysort name year : egen favspeech_year = count(dict)
keep name year favspeech_year
bysort name year: keep if _n==_N
drop name 
egen favspeech_total = total(favspeech_year), by(year) 
drop favspeech_year
bysort year: keep if _n==_N
tsset year
twoway (tsline favspeech_total)
scatter favspeech_total year

twoway line favspeech_total year, lpattern(solid) /// 		
	ytitle("Frequency", size(medium)) ///
	xlab(2000 2002 2004 2006 2008 2010 2012) ///
	title("Translated SRWI Speeches between 1999-2013", size(large)) ///
	xtitle("Year", size(medium))
		
	graph export "$gra/favspeech_per_year_total.png", replace

*SRWI Speeches and SRWI per term male MEPs
use "$data/mep_srwi_0913.dta", clear
drop if female == 1
gen dict = 1 if dictionary == 1
bysort name year : egen favspeech_year = count(dict)
keep name year favspeech_year srwi_year date
bysort name year: keep if _n==_N
drop name 
egen favspeech_total = total(favspeech_year), by(year) 
drop favspeech_year
bysort year: egen srwi_total = total(srwi_year)
drop srwi_year

egen p5srwi = mean(srwi_total) if date < td(20jul2004)
egen p6srwi = mean(srwi_total) if date >= td(20jul2004) & date < td(14jul2009)
egen p7srwi = mean(srwi_total) if date >= td(14jul2009)

gen p_srwi = p5srwi
replace p_srwi = p6srwi if date >= td(20jul2004) & date < td(14jul2009)
replace p_srwi = p7srwi if date >= td(14jul2009)

drop date srwi_total p5srwi p6srwi p7srwi 
bysort year: keep if _n==_N

twoway line favspeech_total year, lpattern(solid) ///
	ytitle("Frequency", size(small)) || /// 		
	line p_srwi year, lpattern(dash_dot) yaxis(2) ///
	ytitle("SRWI Speeches/Speeches overall per term", axis(2) size(small)) ///
	xlab(2000 2002 2004 2006 2008 2010 2012) ///
	title("SRWI Speeches and SRWI per term by male MEPs", size(medium)) ///
	legend(order(1 "SRWI speeches" 2 "SRWI per term")) ///
	xtitle("Year", size(small))
	
	graph export "$gra/srwi_favspeech_per_term_male.png",replace

********************************************************************************
*(2) Descriptive analysis 
use "$data/mep_srwi_0913.dta", clear
*Terms male vs famle used 
tab dv female if dv == 1

tab ch female if ch == 1

tab lm female if lm == 1

tab cj female if cj == 1

tab wh female if wh == 1 

tab rep female if rep == 1

tab dictionary female if dictionary == 1

*Number of MEPs and speeches 
by name, sort: gen counter = 1 if _n == 1
replace counter = sum(counter)
display "Number of MEPs in overall sample is: " =counter[_N]
/*Number of MEPs in overall sample is: 1592*/
drop counter 

by speechnr, sort: gen counter = 1 if _n == 1
replace counter = sum(counter)
display "Number of Speeches in overall sample is: " =counter[_N]
/*Number of speeches in overall sample is: 159881*/
drop counter 

drop if female == 1
drop if age ==. /*DELETE IF AGE MISSING*/

by name, sort: gen counter = 1 if _n == 1
replace counter = sum(counter)
display "Number of male MEPs in overall sample is: " =counter[_N]
/*Number of male MEPs in overall sample is: 1078*/
drop counter 

by speechnr, sort: gen counter = 1 if _n == 1
replace counter = sum(counter)
display "Number of Speeches by male MEPs in overall sample is: " =counter[_N]
/*Number of speeches in overall sample is: 105865*/
drop counter 

********************************************************************************
*(3) Scatter Analysis 

twoway scatter srwi_term women_ep || lfit srwi_term women_ep 
twoway scatter srwi_term women_ppg || lfit srwi_term women_ppg

*(3.1) EP

twoway (scatter srwi_term women_ep), ///
	ytitle("SRWI by male MEPs per parliamentary term", size(small)) ///
	title("Change in SRWI with the proportion of women in the EP", ///
	size(medium)) ///
	xtitle(, size(small))
	
	graph export "$gra/srwi_term_women_ep.png",replace	

*(3.2) PPG

twoway (scatter srwi_term women_ppg), ///
	ytitle("SRWI by male MEPs per parliamentary term") ytitle(, size(small)) ///
	title("Change in SRWI with the proportion of women in PPGs", ///
	size(medium)) ///
	xtitle(, size(small))
	
	graph export "$gra/srwi_term_women_ppg.png",replace	

********************************************************************************
*(4) Regression Analysis
use "$data/mep_srwi_0913.dta", clear
drop if female == 1
drop if age ==.
*Logistic Regression
gen srwi_at_all = 1 if srwi_term != 0
replace srwi_at_all = 0 if srwi_at_all ==.
label var srwi_at_all "SRWI at all?"
label define ///																
	srwiall ///
	0 "No" ///
	1 "Yes"
	
label values srwi_at_all srwiall

/* 407 MEPs (male) 76,340 speeches SRWI*/

bysort name year: egen av_mep_dur = mean(mep_dur)
bysort name year: egen av_age = mean(age)
keep name year srwi_year date partyname speechnr parlterm dictionary femm birthday women_ep av_age av_mep_dur femm_dur parlterm_5 parlterm_6 parlterm_7 country north ceneast south west PPG women_ppg conservatives social_democrats liberals_centrists uen ecr greens gue_ngl eurosceptics independent conservative_ideology women_del women_np leg_quota vol_party_quota quotasize year_1999 year_2000 year_2001 year_2002 year_2003 year_2004 year_2005 year_2006 year_2007 year_2008 year_2009 year_2010 year_2011 year_2012 year_2013 favspeech srwi favspeech_term srwi_term age mep_dur srwi_at_all
bysort name year: keep if _n==_N

*Set graph scheme to neutral
set scheme s1manual
histogram srwi_term
graph export "$gra/histogram_srwitermmale.png", replace 

*Logistic regression
asdoc logistic srwi_at_all women_ep women_ppg women_np women_del conservative_ideology social_democrats liberals_centrists greens gue_ngl femm av_mep_dur av_age north west south leg_quota vol_party_quota year_2000 year_2001 year_2002 year_2003 year_2004 year_2005 year_2006 year_2007 year_2008 year_2009 year_2010 year_2011 year_2012 year_2013, vce(cluster name) cformat(%9.3f) pformat(%5.3f) sformat(%8.3f), replace

histogram srwi_term if srwi_at_all == 1

*log transformation
gen ln_srwi_term = log(srwi_term)
label var ln_srwi_term "Log of srwi_term"

ssc install asdoc, replace

*Linear regression
asdoc reg ln_srwi_term women_ep women_ppg women_np women_del conservative_ideology social_democrats liberals_centrists greens gue_ngl femm av_mep_dur av_age north west south leg_quota vol_party_quota year_2000 year_2001 year_2002 year_2003 year_2004 year_2005 year_2006 year_2007 year_2008 year_2009 year_2010 year_2011 year_2012 year_2013, vce(cluster name) cformat(%9.3f) pformat(%5.3f) sformat(%8.3f), replace

*Residual vs fitted values Plots:
regress srwi_term women_ep women_ppg women_np women_del conservative_ideology social_democrats liberals_centrists greens gue_ngl femm av_mep_dur av_age north west south leg_quota vol_party_quota year_2000 year_2001 year_2002 year_2003 year_2004 year_2005 year_2006 year_2007 year_2008 year_2009 year_2010 year_2011 year_2012 year_2013

rvfplot, yline(0)
graph export "$gra/residuals_nolog.png", replace 

regress ln_srwi_term women_ep women_ppg women_np women_del conservative_ideology social_democrats liberals_centrists greens gue_ngl femm av_mep_dur av_age north west south leg_quota vol_party_quota year_2000 year_2001 year_2002 year_2003 year_2004 year_2005 year_2006 year_2007 year_2008 year_2009 year_2010 year_2011 year_2012 year_2013

rvfplot, yline(0)
graph export "$gra/residuals_log.png", replace 

*Polynomial testing
*EP 
asdoc reg ln_srwi_term women_ep women_ppg women_np women_del conservative_ideology social_democrats liberals_centrists greens gue_ngl femm av_mep_dur av_age north west south leg_quota vol_party_quota year_2000 year_2001 year_2002 year_2003 year_2004 year_2005 year_2006 year_2007 year_2008 year_2009 year_2010 year_2011 year_2012 year_2013, vce(cluster name) cformat(%9.3f) pformat(%5.3f) sformat(%8.3f), nest replace

gen women_ep2 = women_ep*women_ep
asdoc reg ln_srwi_term women_ep women_ep2 women_ppg women_np women_del conservative_ideology  social_democrats liberals_centrists greens gue_ngl femm av_mep_dur av_age north west south leg_quota vol_party_quota year_2000 year_2001 year_2002 year_2003 year_2004 year_2005 year_2006 year_2007 year_2008 year_2009 year_2010 year_2011 year_2012 year_2013, vce(cluster name) cformat(%9.3f) pformat(%5.3f) sformat(%8.3f), nest append

*PPGs
asdoc reg srwi_term women_ep women_ppg women_np women_del conservative_ideology social_democrats liberals_centrists greens gue_ngl femm av_mep_dur av_age north west south leg_quota vol_party_quota year_2000 year_2001 year_2002 year_2003 year_2004 year_2005 year_2006 year_2007 year_2008 year_2009 year_2010 year_2011 year_2012 year_2013, vce(cluster name) cformat(%9.3f) pformat(%5.3f) sformat(%8.3f), nest replace

gen women_ppg2 = women_ppg*women_ppg
asdoc reg srwi_term women_ep women_ppg women_ppg2 women_np women_del conservative_ideology social_democrats liberals_centrists greens gue_ngl femm av_mep_dur av_age north west south leg_quota vol_party_quota year_2000 year_2001 year_2002 year_2003 year_2004 year_2005 year_2006 year_2007 year_2008 year_2009 year_2010 year_2011 year_2012 year_2013, vce(cluster name) cformat(%9.3f) pformat(%5.3f) sformat(%8.3f), nest append

*cubic
gen women_ep3 = women_ep2*women_ep
asdoc logistic srwi_at_all women_ep2 women_ppg women_np women_del conservative_ideology social_democrats liberals_centrists greens gue_ngl femm av_mep_dur av_age north west south leg_quota vol_party_quota year_2000 year_2001 year_2002 year_2003 year_2004 year_2005 year_2006 year_2007 year_2008 year_2009 year_2010 year_2011 year_2012 year_2013, vce(cluster name) cformat(%9.3f) pformat(%5.3f) sformat(%8.3f), replace

gen women_ppg3 = women_ppg2*women_ppg
asdoc logistic srwi_at_all women_ep3 women_ppg women_np women_del conservatives social_democrats liberals_centrists greens gue_ngl femm av_mep_dur av_age north west south leg_quota vol_party_quota year_2000 year_2001 year_2002 year_2003 year_2004 year_2005 year_2006 year_2007 year_2008 year_2009 year_2010 year_2011 year_2012 year_2013, vce(cluster name) cformat(%9.3f) pformat(%5.3f) sformat(%8.3f), replace

use "$data/mep_srwi_0913.dta", clear
drop if female == 1
drop if age ==.

********************************************************************************
*(5) Robustness test includes the proportion of female MEPs in the subsequent legislative term (t+1) as its main independent variable to test whether male MEPs anticipate an increase in the number of female MEPs after the next election (Höhmann 2020)

*EP
gen share_women_ep_next = 31.21 if date < td(20jul2004)
replace share_women_ep_next = 36.1 if date >= td(20jul2004) & date < td(14jul2009)
replace share_women_ep_next = 36.62 if date >= td(14jul2009)
label var share_women_ep_next "Share of women in EP at t+1"

asdoc logistic srwi_at_all share_women_ep_next women_ppg women_np women_del conservative_ideology social_democrats liberals_centrists greens gue_ngl femm av_mep_dur av_age north west south leg_quota vol_party_quota year_2000 year_2001 year_2002 year_2003 year_2004 year_2005 year_2006 year_2007 year_2008 year_2009 year_2010 year_2011 year_2012 year_2013, vce(cluster name) cformat(%9.3f) pformat(%5.3f) sformat(%8.3f), replace

asdoc reg ln_srwi_term share_women_ep_next women_ppg women_np women_del conservative_ideology social_democrats liberals_centrists greens gue_ngl femm av_mep_dur av_age north west south leg_quota vol_party_quota year_2000 year_2001 year_2002 year_2003 year_2004 year_2005 year_2006 year_2007 year_2008 year_2009 year_2010 year_2011 year_2012 year_2013, vce(cluster name) cformat(%9.3f) pformat(%5.3f) sformat(%8.3f), replace

*PPGs
gen share_women_ppg_next =. 
label var share_women_ppg_next "Share of women in PPG at t+1"

*EPP-ED & EPP
replace share_women_ppg_next = 23.13 if date < td(10aug2004) & PPG == "EPP-ED"
replace share_women_ppg_next = 24.31 if date >= td(10aug2004) & date < td(25mar2009) & PPG == "EPP-ED"
replace share_women_ppg_next = 33.58 if date >= td(25mar2009) & date < td(20jul2009) & (PPG == "EPP-ED" | PPG == "EPP")
replace share_women_ppg_next = 31.22 if PPG == "EPP" & date >= td(20jul2009)

*PSE & S&D
replace share_women_ppg_next = 39.9 if date < td(10aug2004) & PPG == "PSE"
replace share_women_ppg_next = 41.47 if date >= td(10aug2004) & date < td(25mar2009) & PPG == "PSE"
replace share_women_ppg_next = 40.22 if date >= td(25mar2009) & date < td(20jul2009) & (PPG == "PSE" | PPG == "S&D")
replace share_women_ppg_next = 45.55 if PPG == "S&D" & date >= td(20jul2009)

*ELDR & ALDE
replace share_women_ppg_next = 38.64 if date < td(10aug2004) & PPG == "ELDR"
replace share_women_ppg_next = 40 if date >= td(10aug2004) & date < td(25mar2009) & (PPG == "ELDR" | PPG == "ALDE")
replace share_women_ppg_next = 45.24 if date >= td(25mar2009) & date < td(20jul2009) & (PPG == "ALDE")
replace share_women_ppg_next = 38.31 if PPG == "ALDE" & date >= td(20jul2009)

*UEN
replace share_women_ppg_next = 18.52 if date < td(10aug2004) & PPG == "UEN"
replace share_women_ppg_next = 11.36 if date >= td(10aug2004) & date < td(25mar2009) & PPG == "UEN" 
replace share_women_ppg_next =. if date >= td(25mar2009) & date < td(20jul2009) & (PPG == "UEN")

*ECR 
replace share_women_ppg_next = 21.43 if PPG == "ECR" & date >= td(20jul2009)

*Greens/EFA
replace share_women_ppg_next = 47.62 if date < td(10aug2004) & PPG == "Greens/EFA"
replace share_women_ppg_next = 46.51 if date >= td(10aug2004) & date < td(25mar2009) & PPG == "Greens/EFA"
replace share_women_ppg_next = 54.55 if date >= td(25mar2009) & date < td(20jul2009) & PPG == "Greens/EFA"
replace share_women_ppg_next = 42 if PPG == "Greens/EFA" & date >= td(20jul2009)

*GUE/NGL
replace share_women_ppg_next = 29.27 if date < td(10aug2004) & PPG == "GUE/NGL"
replace share_women_ppg_next = 28.57 if date >= td(10aug2004) & date < td(20jul2009) & PPG == "GUE/NGL"
replace share_women_ppg_next = 50 if PPG == "GUE/NGL" & date >= td(20jul2009)

*Eurosceptics (EDD, IND/DEM, EFD)
replace share_women_ppg_next = 8.11 if date < td(10aug2004) & PPG == "EDD"
replace share_women_ppg_next = 18.18 if date >= td(10aug2004) & date < td(25mar2009) & PPG == "IND/DEM"
replace share_women_ppg_next = 15.63 if date >= td(25mar2009) & date < td(20jul2009) & PPG == "IND/DEM"
replace share_women_ppg_next = 37.5 if PPG == "EFD" & date >= td(20jul2009)

*Independent (Non-attached & TDI)
replace share_women_ppg_next = 22.22 if date < td(10aug2004) & PPG == "Technical Group of Independent Members"
replace share_women_ppg_next = 24.14 if date < td(10aug2004) & PPG == "NI - non-attached members"
replace share_women_ppg_next = 13.33 if date >= td(10aug2004) & date < td(25mar2009) & PPG == "NI - non-attached members"
replace share_women_ppg_next = 18.52 if date >= td(25mar2009) & date < td(20jul2009) & PPG == "NI - non-attached members"
replace share_women_ppg_next = 28.85 if PPG == "NI - non-attached members" & date >= td(20jul2009)
	
br if women_ppg ==.

asdoc logistic srwi_at_all share_women_ppg_next women_ep women_np women_del conservative_ideology social_democrats liberals_centrists greens gue_ngl femm av_mep_dur av_age north west south leg_quota vol_party_quota year_2000 year_2001 year_2002 year_2003 year_2004 year_2005 year_2006 year_2007 year_2008 year_2009 year_2010 year_2011 year_2012 year_2013, vce(cluster name) cformat(%9.3f) pformat(%5.3f) sformat(%8.3f), replace

asdoc reg ln_srwi_term share_women_ppg_next women_ep women_np women_del conservative_ideology social_democrats liberals_centrists greens gue_ngl femm av_mep_dur av_age north west south leg_quota vol_party_quota year_2000 year_2001 year_2002 year_2003 year_2004 year_2005 year_2006 year_2007 year_2008 year_2009 year_2010 year_2011 year_2012 year_2013, vce(cluster name) cformat(%9.3f) pformat(%5.3f) sformat(%8.3f), replace

